************
* ANALYSIS *
************
use Thomson_Poland_Surveillance_PSRM_Replication_Data, clear

*************************
* Parallel Trends Graph *
*************************
sort district_split year
by district_split year: egen informants_treated = mean(merged_informants)
graph twoway (connected informants_treated year if district_split==1 & problem_district==0 & year>1969 & year<1981, msymbol(Dh) mcolor(black) lpattern(longdash) lwidth(medthick) lcolor(black)) ///
(connected informants_treated year if district_split==0 & problem_district==0 & year>1969 & year<1981, msymbol(Th) mcolor(black) lwidth(medthick) lcolor(black) ///
legend(order(2 "Control (No Split)" 1 "Treated (Split)") size(small)) bgcolor(white) graphregion(fcolor(white) lwidth(large)) xlabel(1970(1)1980) ///
xtitle("Year") xline(1975, lwidth(medthin)) ytitle("Average Collaborators/District") legend(pos(6) size(medium)))
*graph export Poland_District_Informants_Trends_1.pdf, replace

sort split_magnitude year
by split_magnitude year: egen informants_tograph = mean(merged_informants)
graph twoway (connected informants_tograph year if split_magnitude==0 & problem_district==0 & year>1969 & year<1981, msymbol(Th) mcolor(black) lwidth(medthick) lcolor(black)) ///
(connected informants_tograph year if split_magnitude==1 & problem_district==0 & year>1969 & year<1981, msymbol(Dh) mcolor(black) lpattern(longdash) lwidth(medthick) lcolor(black)) ///
(connected informants_tograph year if split_magnitude==2 & problem_district==0 & year>1969 & year<1981, msymbol(Oh) mcolor(gray) lpattern(longdash) lwidth(medthick) lcolor(gray)) ///
(connected informants_tograph year if split_magnitude==3 & problem_district==0 & year>1969 & year<1981, msymbol(Sh) mcolor(black) lpattern(dot) lwidth(medthick) lcolor(black)) ///
(connected informants_tograph year if split_magnitude==4 & problem_district==0 & year>1969 & year<1981, msymbol(dh) mcolor(black) lpattern(dash) lwidth(med) lcolor(black) ///
legend(order(1 "Control" 2 "Split (1)" 3 "Split (2)" 4 "Split (3)" 5 "Split (4)") size(small)) bgcolor(white) graphregion(fcolor(white) lwidth(large)) xlabel(1970(1)1980) ///
xtitle("Year") xline(1975, lwidth(medthin)) ytitle("Average Collaborators/District") legend(pos(6) size(medium) cols(3)))
*graph export Poland_District_Informants_Trends_2.pdf, replace

****************************************
* Descriptive graphs of informant data *
****************************************

set scheme plotplain
graph twoway (scatter merged_informants pop_int if problem_district==0 & year>1956 & year<1981, msymbol(Oh)) ///
(lfit merged_informants pop_int if problem_district==0 & year>1956 & year<1981, lpattern(solid) lwidth(medthick)), ///
title("Collaborators vs Population", pos(12) size(medium)) ytitle("Collaborators") xtitle("Population") ///
legend(off) xsize(6) ysize(4) graphregion(color(white)) bgcolor(white)
graph save collaborators_pop.gph, replace

hist merged_informants if problem_district==0 & year>1956 & year<1981, bin(30) frequency ///
title("Collaborator Numbers", pos(12) size(medium)) xtitle("Collaborators") ///
legend(off) xsize(6) ysize(4) graphregion(color(white)) bgcolor(white)
graph save collaborators.gph, replace

graph combine collaborators.gph collaborators_pop.gph, cols(2) graphregion(color(white)) xsize(8) ysize(4)
*graph export Collaborators_Data.pdf, replace


summ merged_informants binary_treatment split_ord2 split_ord3 split_ord4 split_ord5 ///
unrest_1976_bin if problem_district==0 & year>1956 & year<1981, detail


************
* Analysis *
************
sort district_id year 

**************************************************************
* Balance Tests/ Differences in split vs non-split districts *
**************************************************************
reg ind_pc district_split if year==1965
margins, at(district_split=(0 1))
marginsplot, recast(scatter) ///
plotopts(msymbol(Oh) msize(large) mlwidth(medthick)) ciopts(recast(rcap) lwidth(medthick)) ///
title("") t1title("Industry", pos(12) ring(6) size(medium)) xtitle("Binary Split Indicator") ytitle("Industry %") ///
xsize(4) ysize(5) bgcolor(white) graphregion(fcolor(white)) saving(ind_balance.gph, replace)

gen ln_area = ln(area)
reg ln_area district_split if year==1965
margins, at(district_split=(0 1))
marginsplot, recast(scatter) ///
plotopts(msymbol(Oh) msize(large) mlwidth(medthick)) ciopts(recast(rcap) lwidth(medthick)) ///
title("") t1title("Area", pos(12) ring(6) size(medium)) xtitle("Binary Split Indicator") ytitle("Area Sq Km") ///
xsize(4) ysize(5) bgcolor(white) graphregion(fcolor(white)) saving(area_balance.gph, replace)

reg ln_pop district_split if year==1965
margins, at(district_split=(0 1))
marginsplot, recast(scatter) ///
plotopts(msymbol(Oh) msize(large) mlwidth(medthick)) ciopts(recast(rcap) lwidth(medthick)) ///
title("") t1title("Population", pos(12) ring(6) size(medium)) xtitle("Binary Split Indicator") ytitle("Population") ///
xsize(4) ysize(5) bgcolor(white) graphregion(fcolor(white)) saving(pop_balance.gph, replace)

reg schools district_split if year==1965
margins, at(district_split=(0 1))
marginsplot, recast(scatter) ///
plotopts(msymbol(Oh) msize(large) mlwidth(medthick)) ciopts(recast(rcap) lwidth(medthick)) ///
title("") t1title("Primary Schools", pos(12) ring(6) size(medium)) xtitle("Binary Split Indicator") ytitle("Schools/Cap") ///
xsize(4) ysize(5) bgcolor(white) graphregion(fcolor(white)) saving(schools_balance.gph, replace)

reg investment district_split if year==1965
margins, at(district_split=(0 1))
marginsplot, recast(scatter) ///
plotopts(msymbol(Oh) msize(large) mlwidth(medthick)) ciopts(recast(rcap) lwidth(medthick)) ///
title("") t1title("Investment", pos(12) ring(6) size(medium)) xtitle("Binary Split Indicator") ytitle("Investment/Cap") ///
xsize(4) ysize(5) bgcolor(white) graphregion(fcolor(white)) saving(investment_balance.gph, replace)

reg ln_popdens district_split if year==1965
margins, at(district_split=(0 1))
marginsplot, recast(scatter) ///
plotopts(msymbol(Oh) msize(large) mlwidth(medthick)) ciopts(recast(rcap) lwidth(medthick)) ///
title("") t1title("Pop Density", pos(12) ring(6) size(medium)) xtitle("Binary Split Indicator") ytitle("Population/Sq Km") ///
xsize(4) ysize(5) bgcolor(white) graphregion(fcolor(white)) saving(popdens_balance.gph, replace)

graph combine ind_balance.gph schools_balance.gph investment_balance.gph pop_balance.gph area_balance.gph popdens_balance.gph, xsize(5) ysize(7) cols(2)
*graph export 1965_Balance_Tests.pdf, replace


*****************
* Summary Stats *
*****************
*sutex merged_informants district_split split_magnitude unrest_1976_bin, labels minmax ///
title("Summary Statistics") key(tab:sumstat) file(Poland_Splits_Sumstats.tex) replace




***************************
* Binary Reform Indicator *
***************************
reg merged_informants binary_treatment split district_split ///
i.district_id i.year if problem_district==0 & year>1956 & year<1981, vce(cluster district_id)
est store binary_1

margins, at(binary_treatment=(0 1))
marginsplot, recast(scatter) ///
plotopts(msymbol(Oh) msize(large) mlwidth(med)) ciopts(lwidth(med)) ///
title("") t1title("Model 1.1", pos(12) ring(6) size(large)) ///
xtitle("Binary Split Indicator", size(large)) xlabel(, labsize(large)) ///
ytitle("Collaborators", size(large)) ylabel(, labsize(large)) ///
xsize(5) ysize(4) bgcolor(white) graphregion(fcolor(white)) saving(binary_1.gph, replace)

* Include only 1969-1980
reg merged_informants binary_treatment split district_split ///
i.district_id i.year if problem_district==0 & year>1968 & year<1981, vce(cluster district_id)
est store binary_2

* Include all districts including imperfectly matched in SE
reg merged_informants binary_treatment split district_split ///
i.district_id i.year if year>1956 & year<1981, vce(cluster district_id)
est store binary_3

* Without districts split into 4
reg merged_informants binary_treatment split district_split ///
i.district_id i.year if problem_district==0 & year>1956 & year<1981 & split_magnitude<4, vce(cluster district_id)
est store binary_4

* Including control for population
reg merged_informants binary_treatment split district_split ///
ln_pop i.district_id i.year if problem_district==0 & year>1956 & year<1981, vce(cluster district_id)
est store binary_5

* Include unrest 
reg merged_informants binary_treatment split district_split ///
unrest_1976_bin post_1976 unrest_post_1976 ///
i.district_id i.year if problem_district==0 & year>1956 & year<1981, vce(cluster district_id)
est store binary_unrest_1


*********************
* Ordinal treatment *
*********************
* All years
reg merged_informants split ord_treat2 ord_treat3 ord_treat4 ord_treat5 ///
split_ord2 split_ord3 split_ord4 split_ord5 ///
i.district_id i.year ///
if problem_district==0 & year>1956 & year<1981, vce(cluster district_id)
est store ordinal_1

coefplot, keep(split_ord2 split_ord3 split_ord4 split_ord5) ///
msymbol(Oh) msize(large) mlwidth(med) ciopts(recast(rcap) lwidth(med)) ///
label yline(0) ///
ytitle("Effect on Collaborators", size(large)) ylabel(, labsize(large)) ///
xlabel(1 "(1)" 2 "(2)" 3 "(3)" 4 "(4)", labsize(large)) xtitle("Split Magnitude", size(large)) ///
t1title("Model 1.7", pos(12) ring(6) size(large)) ///
bgcolor(white) graphregion(fcolor(white)) xsize(5) ysize(4) vertical saving(ordinal_1.gph, replace)

graph combine binary_1.gph ordinal_1.gph, cols(2) xsize(9) ysize(5)
*graph export Poland_Split_Results_1.pdf, replace

reg merged_informants i.split##i.split_magnitude ///
i.district_id i.year ///
if problem_district==0 & year>1956 & year<1981, vce(cluster district_id)


* Include only 1969-1980
reg merged_informants split ord_treat2 ord_treat3 ord_treat4 ord_treat5 ///
split_ord2 split_ord3 split_ord4 split_ord5 ///
i.district_id i.year ///
if problem_district==0 & year>1968 & year<1981, vce(cluster district_id)
est store ordinal_2

* Including control for population
reg merged_informants split ord_treat2 ord_treat3 ord_treat4 ord_treat5 ///
split_ord2 split_ord3 split_ord4 split_ord5 ///
ln_pop i.district_id i.year ///
if problem_district==0 & year>1956 & year<1981, vce(cluster district_id)
est store ordinal_3




***********
* Table 1 *
***********

* Table of results
esttab binary_1 binary_2 binary_3 binary_4 binary_5 binary_unrest_1 ordinal_1 ordinal_2 ordinal_3, ///
keep(binary_treatment ///
split_ord2 split_ord3 split_ord4 split_ord5 ln_pop unrest_post_1976) ///
order(binary_treatment ///
split_ord2 split_ord3 split_ord4 split_ord5 ln_pop unrest_post_1976) ///
mtitle("1957-80" "1969-80" "All Dist" "Split <4" "1957-80" "1957-80" "1957-80" "1969-80" "1957-80") ///
star(* 0.10 ** 0.05 *** 0.01) scalars(N_clust) ///
label compress nogaps se b(2)

*esttab binary_1 binary_2 binary_3 binary_4 binary_5 binary_unrest_1 ordinal_1 ordinal_2 ordinal_3 using Poland_Split_Results_1.tex, ///
*keep(binary_treatment ///
*split_ord2 split_ord3 split_ord4 split_ord5 ln_pop unrest_post_1976) ///
*order(binary_treatment ///
*split_ord2 split_ord3 split_ord4 split_ord5 ln_pop unrest_post_1976) ///
*mtitle("1957-80" "1969-80" "All Dist" "Split <4" "1957-80" "1957-80" "1957-80" "1969-80" "1957-80") ///
*star(* 0.10 ** 0.05 *** 0.01) scalars(N_clust) ///
*label compress nogaps se b(2) replace



********************************************
* Event Study: Binary Effects Through Time *
********************************************

* Include only 1969-1980
reg merged_informants i.district_split##i.year ///
i.district_id if problem_district==0 & year>1968 & year<1981, vce(cluster district_id)
est store event_1

* Include all districts including imperfectly matched in SE
reg merged_informants i.district_split##i.year ///
i.district_id if year>1968 & year<1981, vce(cluster district_id)
est store event_2

* Without districts split into 4
reg merged_informants i.district_split##i.year ///
i.district_id if problem_district==0 & year>1968 & year<1981 & split_magnitude<4, vce(cluster district_id)
est store event_3

* Extend back to 1957
reg merged_informants i.district_split##i.year ///
i.district_id if problem_district==0 & year>1957 & year<1981, vce(cluster district_id)
est store event_4


************
* Table A3 *
************
esttab event_1 event_2 event_3 event_4, ///
keep(1.district_split#1970.year 1.district_split#1971.year 1.district_split#1972.year ///
1.district_split#1973.year 1.district_split#1974.year 1.district_split#1975.year ///
1.district_split#1976.year 1.district_split#1977.year 1.district_split#1978.year ///
1.district_split#1979.year 1.district_split#1980.year) ///
order(1.district_split#1970.year 1.district_split#1971.year 1.district_split#1972.year ///
1.district_split#1973.year 1.district_split#1974.year 1.district_split#1975.year ///
1.district_split#1976.year 1.district_split#1977.year 1.district_split#1978.year ///
1.district_split#1979.year 1.district_split#1980.year) ///
coeflabels(1.district_split#1970.year "Split*1970" 1.district_split#1971.year "Split*1971" ///
1.district_split#1972.year "Split*1972" 1.district_split#1973.year "Split*1973" 1.district_split#1974.year "Split*1974" ///
1.district_split#1975.year "Split*1975" 1.district_split#1976.year "Split*1976" 1.district_split#1977.year "Split*1977" ///
1.district_split#1978.year "Split*1978" 1.district_split#1979.year "Split*1979" 1.district_split#1980.year "Split*1980") ///
mtitle("1969-80" "All Dist" "Split <4" "All Years") ///
star(* 0.10 ** 0.05 *** 0.01) scalars(N_clust) ///
label wide se b(2)

*esttab event_1 event_2 event_3 event_4 using Poland_Event_Results.tex, ///
*keep(1.district_split#1970.year 1.district_split#1971.year 1.district_split#1972.year ///
*1.district_split#1973.year 1.district_split#1974.year 1.district_split#1975.year ///
*1.district_split#1976.year 1.district_split#1977.year 1.district_split#1978.year ///
*1.district_split#1979.year 1.district_split#1980.year) ///
*order(1.district_split#1970.year 1.district_split#1971.year 1.district_split#1972.year ///
*1.district_split#1973.year 1.district_split#1974.year 1.district_split#1975.year ///
*1.district_split#1976.year 1.district_split#1977.year 1.district_split#1978.year ///
*1.district_split#1979.year 1.district_split#1980.year) ///
*coeflabels(1.district_split#1970.year "Split*1970" 1.district_split#1971.year "Split*1971" ///
*1.district_split#1972.year "Split*1972" 1.district_split#1973.year "Split*1973" 1.district_split#1974.year "Split*1974" ///
*1.district_split#1975.year "Split*1975" 1.district_split#1976.year "Split*1976" 1.district_split#1977.year "Split*1977" ///
*1.district_split#1978.year "Split*1978" 1.district_split#1979.year "Split*1979" 1.district_split#1980.year "Split*1980") ///
*mtitle("1969-80" "All Dist" "Split <4" "All Years") ///
*star(* 0.10 ** 0.05 *** 0.01) scalars(N_clust) ///
*label wide se b(2) replace

**********************************************************
* Figure 4: Coefficient plot for event study, Model A3.1 *
**********************************************************
reg merged_informants i.district_split##i.year ///
i.district_id if problem_district==0 & year>1968 & year<1981, vce(cluster district_id)

coefplot, vertical keep(1.district_split#1970.year 1.district_split#1971.year 1.district_split#1972.year ///
1.district_split#1973.year 1.district_split#1974.year 1.district_split#1975.year ///
1.district_split#1976.year 1.district_split#1977.year 1.district_split#1978.year ///
1.district_split#1979.year 1.district_split#1980.year) ///
xtitle(Year) coeflabels(1.district_split#1970.year = "1970" 1.district_split#1971.year = "1971" ///
1.district_split#1972.year = "1972" 1.district_split#1973.year = "1973" 1.district_split#1974.year = "1974" ///
1.district_split#1975.year = "1975" 1.district_split#1976.year = "1976" 1.district_split#1977.year = "1977" ///
1.district_split#1978.year = "1978" 1.district_split#1979.year = "1979" 1.district_split#1980.year = "1980") ///
yline(0, lwidth(thin)) ytitle(Effect of Split) ///
msymbol(Oh) msize(large) mlwidth(med) ciopts(recast(rcap) lwidth(med))

*graph export Poland_Event_Coefs.pdf, replace

* Figure A5: Coefficient plot for event study, Model A3.4
reg merged_informants i.district_split##i.year ///
i.district_id if problem_district==0 & year>1957 & year<1981, vce(cluster district_id)

coefplot, vertical keep(1.district_split#1959.year 1.district_split#1960.year 1.district_split#1961.year ///
1.district_split#1962.year 1.district_split#1963.year 1.district_split#1964.year ///
1.district_split#1965.year 1.district_split#1966.year 1.district_split#1967.year ///
1.district_split#1968.year 1.district_split#1969.year ///
1.district_split#1970.year 1.district_split#1971.year 1.district_split#1972.year ///
1.district_split#1973.year 1.district_split#1974.year 1.district_split#1975.year ///
1.district_split#1976.year 1.district_split#1977.year 1.district_split#1978.year ///
1.district_split#1979.year 1.district_split#1980.year) ///
xtitle(Year) coeflabels(1.district_split#1959.year = "59" 1.district_split#1960.year = "60" 1.district_split#1961.year = "61" ///
1.district_split#1962.year = "62" 1.district_split#1963.year = "63" 1.district_split#1964.year = "64" ///
1.district_split#1965.year = "65" 1.district_split#1966.year = "66" 1.district_split#1967.year = "67" ///
1.district_split#1968.year = "68" 1.district_split#1969.year = "69" ///
1.district_split#1970.year = "70" 1.district_split#1971.year = "71" ///
1.district_split#1972.year = "72" 1.district_split#1973.year = "73" 1.district_split#1974.year = "74" ///
1.district_split#1975.year = "75" 1.district_split#1976.year = "76" 1.district_split#1977.year = "77" ///
1.district_split#1978.year = "78" 1.district_split#1979.year = "79" 1.district_split#1980.year = "80") ///
yline(0, lwidth(thin)) ytitle(Effect of Split) ylabel(-200(200)1000) ///
msymbol(Oh) msize(large) mlwidth(med) ciopts(recast(rcap) lwidth(med))

*graph export Poland_Event_Coefs_ALL.pdf, replace